<?php

/**
 * @file
 * Home of the CCloudFeedsHTTPFetcher and related classes.
 */

/**
 * Result of CCloudFeedsHTTPFetcher::fetch().
 */
class CCloudFeedsHTTPFetcherResult extends FeedsHTTPFetcherResult {

  /**
   * The User-Agent to emulate when fetching result.
   *
   * @var string
   */
  protected $userAgent;

  /**
   * Overrides FeedsHTTPFetcherResult::getRaw();
   */
  public function getRaw() {
    if (!isset($this->raw)) {
      module_load_include('inc', 'ccloud_feeds', 'libraries/ccloud_http_request');
      $result = ccloud_http_request_get($this->url, NULL, NULL, $this->acceptInvalidCert, $this->timeout, $this->userAgent);
      if (!in_array($result->code, array(200, 201, 202, 203, 204, 205, 206))) {
        throw new Exception(t('Download of @url failed with code !code.', array('@url'  => $this->url,
                                                                                '!code' => $result->code
        )));
      }
      $this->raw = $result->data;
    }

    return $this->sanitizeRaw($this->raw);
  }

  public function getUserAgent() {
    return $this->userAgent;
  }

  public function setUserAgent($user_agent) {
    $this->userAgent = $user_agent;
  }

}

/**
 * Fetches data via HTTP.
 */
class CCloudFeedsHTTPFetcher extends FeedsHTTPFetcher {

  /**
   * Override parent::configDefaults().
   */
  public function configDefaults() {
    $defaults = parent::configDefaults();
    $defaults['one_time_source'] = FALSE;
    $defaults['user_agent'] = '';
    $defaults['next_source_xpath'] = '';

    return $defaults;
  }

  /**
   * @Override parent::configForm().
   */
  public function configForm(&$form_state) {
    $form = parent::configForm($form_state);

    $form['advanced']['user_agent'] = array(
      '#type'          => 'textfield',
      '#title'         => t('Custom User-Agent'),
      '#description'   => t('Set the user-agent header to emulate a browser or device.'),
      '#default_value' => $this->config['user_agent'],
    );

    $form['advanced']['one_time_source'] = array(
      '#type'          => 'checkbox',
      '#title'         => t('One time source'),
      '#description'   => t('Enabled to skip import of this feed item if it has been imported already.'),
      '#default_value' => $this->config['one_time_source'],
    );

    $form['advanced']['next_source_xpath'] = array(
      '#type'          => 'textfield',
      '#title'         => t('Xpath of link to next page'),
      '#description'   => t('Import content of next pages.'),
      '#default_value' => $this->config['next_source_xpath'],
    );

    return $form;
  }

  /**
   * Copy from FeedsHTTPFetcher->fetch() and customized.
   */
  private function _fetch(FeedsSource $source) {
    $source_config = $source->getConfigFor($this);

    $feed_node = node_load($source->feed_nid);
    if (!empty($feed_node->field_next_feed_source[LANGUAGE_NONE][0]['value'])) {
      $source_config['source'] = $feed_node->field_next_feed_source[LANGUAGE_NONE][0]['value'];
    }

    if ($this->config['use_pubsubhubbub'] && ($raw = $this->subscriber($source->feed_nid)->receive())) {
      return new FeedsFetcherResult($raw);
    }
    $fetcher_result = new CCloudFeedsHTTPFetcherResult($source_config['source']);
    // When request_timeout is empty, the global value is used.
    $fetcher_result->setTimeout($this->config['request_timeout']);
    $fetcher_result->setAcceptInvalidCert($this->config['accept_invalid_cert']);
    $fetcher_result->setUserAgent($this->config['user_agent']);
    return $fetcher_result;
  }

  /**
   * Implements FeedsFetcher::fetch().
   */
  public function fetch(FeedsSource $source) {

    $fetcher_result = $this->_fetch($source);

    $raw = $fetcher_result->getRaw();

    // Get meta tag that sets charset
    if (preg_match('/<meta.+?charset=([-\w]+).*\>/i', $raw, $matches) && strtolower($matches[1]) !== 'utf-8') {
      $meta = str_replace('charset=' . $matches[1], 'charset=utf-8', $matches[0]);

      if (preg_match('/<meta[^>]+charset\s*=\s*["\']?([\w-]+)"\s+\/>/i', $raw, $matches_extra)) {
        $raw = str_replace($matches_extra[0], '', $raw);
      }
    }
    elseif (preg_match('/<meta.+?charset="([-\w]+)".*\>/i', $raw, $matches)) {
      $meta = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
    }
    else {
      $meta = '';
    }

    // Convert document to UTF-8
    if ($meta !== '') {
      $raw = str_replace($matches[0], $meta, $raw);
      $raw = @iconv($matches[1], 'utf-8//IGNORE', $raw);
      $fetcher_result = new FeedsFetcherResult($raw);
    }

    $config = $this->config;
    $next_source_xpath = !empty($config['next_source_xpath']) ? $config['next_source_xpath'] : NULL;

    if (!empty($next_source_xpath)) {
      $dom = new DOMDocument();
      $dom->loadHTML($raw);
      $xpath = new DOMXPath($dom);
      $query = $xpath->query($next_source_xpath);

      $state = $source->state(FEEDS_FETCH);

      if ($query && $query->length > 0) {
        $state->progress = 0.5;
      }
      else {
        $state->progress = FEEDS_BATCH_COMPLETE;
      }
    }

    return $fetcher_result;
  }

  /**
   * Override parent::importPeriod().
   */
  public function importPeriod(FeedsSource $source) {
    /*
     * @see scheduleImport() in FeedsSource.inc line #296 for the idea.
     */
    if ($this->config['one_time_source']) {
      // skip import of this feed item if imported timestamp isn't null.
      // @todo: check if the previous import is successful.
      if (is_numeric($source->imported) && $source->imported > 0) {
        return FEEDS_SCHEDULE_NEVER;
      }
    }
  }

}